rm(list=ls())
# last edited: 15 december 2023

#install packages if you have not installed them before
# install.packages("pacman")
#install.packages("haven")
#install.packages("readr")
#install.packages("readxl")
#install.packages("openxlsx")
#install.packages("tidyverse")
#install.packages("estimatr")
#install.packages("dplyr")
#install.packages("modelsummary")
#install.packages("foreign")
#install.packages("texreg")
#install.packages("wesanderson")
#install.packages("dotwhisker")
#install.packages("arsenal")
#install.packages("ggplot2")
#install.packages("dotwhisker")
#install.packages("xtable")
#install.packages("readstata13")
#install.packages("coefplot")
#install.packages("metafor")
#install.packages("base")
#install.packages("xtable")

# Load all the packages
library(pacman)
library(haven)
library(readr)
library(readxl)
library(openxlsx)
library(tidyverse)
library(estimatr)
library(dplyr)
library(modelsummary)
library(foreign)
library(texreg)
library(wesanderson)
library(dotwhisker)
library(arsenal)
library(ggplot2)
library(arsenal)
library(dotwhisker)
library(xtable)
library(readstata13) 
library(tidyverse)
library(coefplot)
library(metafor)
library(base)
library(xtable)


# Set working directory (replace with your own)
setwd("~/Dropbox/JRRT/Digital Experiments Paper/R&P submission/Replication files/Data")

# Read datasets
df<-read_rds("meta_analysis.rds") 
campaign_stats <- read.csv("campaign_stats.csv", sep = ";")
df_1 = read_rds("Study1.rds")
df_2 = read_rds("Study2.rds")
df_3 = read_rds("Study3.rds")

##### Table 1: Campaign statistics ######
row.names(campaign_stats) <- campaign_stats$Metric
campaign_stats$Metric <- NULL
print(xtable(campaign_stats, caption = "Campaign statistics", label = "tab1", align = "l|rrr"), 
      caption.placement = "top")

###### Figure 1: Comparison of the three trials ######
res <- rma(estimate, sei=std.error, data = df, method="FE")
pdf("forest_plot.pdf")
# Custom labels for the studies
custom_labels <- c("Study 1: 2019 GE, PS Targeting", "Study 2: 2021 LE, PS Targeting", "Study 3: 2021 LE, Individual Targeting")
# Creating a forest plot with custom labels on the y-axis
print(forest(res, slab = custom_labels, xlab = "Estimated ITT on Voter Registration Rate"))
# Save the image
dev.off()

###### Table A.1: Descriptive statistics of Study 1 - Outcome variables ######
df_desc <- df_1 %>%
  dplyr::select(n_young_withmiss,n_young_withmiss_sc,
                Treatment)
tableone <- tableby(Treatment ~ ., data = df_desc)
summary(tableone, text = "latex", digits=1)

###### Table A.2: Descriptive statistics of Study 1 - Covariates ######
df_desc <- df_1 %>%
  dplyr::select(population_all,population_young, N_BAME_sum, ps_size, mean_age, overlap_number,
                Treatment)
tableone <- tableby(Treatment ~ ., data = df_desc, )
summary(tableone, text = "latex", digits=1)


###### Table A.3: Descriptive statistics of Study 2 - Outcome variables ######
df_desc <- df_2 %>%
  dplyr::select(n_nomiss_sc,registration_pre,registration,
                Treatment)
tableone <- tableby(Treatment ~ ., data = df_desc)
summary(tableone, text = "latex", digits=1)

###### Table A.4: Descriptive statistics of Study 2 - Covariates ######
df_desc <- df_2 %>%
  dplyr::select(population_all,population_young, N_BAME_sum, ps_size,mean_age,
                Treatment)
tableone <- tableby(Treatment ~ ., data = df_desc)
summary(tableone, text = "latex", digits=1)

###### Table A.5: Descriptive statistics of Study 3 - Outcome variables ######
df_desc <- df_3 %>%
  dplyr::select(registration, turnout,
                Treatment)
tableone <- tableby(Treatment ~ ., data = df_desc)
summary(tableone, text = "latex")

###### Table A.6: Descriptive statistics of Study 3 - Covariates ######
df_desc <- df_3 %>%
  dplyr::select(Gender,
                Treatment)
tableone <- tableby(Treatment ~ ., data = df_desc)
summary(tableone, text = "latex")


###### Table A.7: Balance check based on regression of treatment assignment on the covariates ######
m1<-lm_robust(Treatment~ps_size + population_young + N_BAME_sum, fixed_effects = Constituency, data=df_1)
m2<-lm_robust(Treatment~Pre+mean_age+ ps_size + N_BAME_sum+population_young, fixed_effects = blocks, data=df_2)
m3<-lm_robust(Treatment~sample+gender,fixed_effects = factor(county), data=df_3)

texreg(list(m1, m2, m3), include.ci = FALSE,
       custom.model.names = c("Study1", "Study2", "Study3"),
       caption="Balance check based on regression of treatment assignment on the covari-
ates",
       label = "tab:balance",
       digits = 4,
       custom.gof.rows = list("Constituency FE" = c("Yes", "Yes", "Yes")))

###### Table A.8: Number of individuals targeted with ads per county in Study 3 ######
df_filtered <- df_3 %>%
  filter(!(exp_group %in% c("noFB", "noFB, noSMS", "noFB, SMS", "noFB, SMS+callback")))
df_filtered = df_filtered %>%
  group_by(county) %>%
  summarize(
    individual_count = n()
  ) %>%
  ungroup()
latex_table <- xtable(df_filtered, align = c("l", "r", "r"))
print(latex_table, include.rownames = FALSE, caption = "Summary of Individual Counts by County (Excluding NoFB and NoFB, noSMS Groups)", caption.placement = "top")

####### Study 1 #######

# create cov names
covariate_names <- list(
  "(Intercept)" = "(Intercept)",
  "Treatment" = "Treatment",
  "ps_size" = "Sector Size",
  "population_all" = "All population",
  "population_young" = "Young population",
  "bame_prop"="Prop. of BAME",
  "overlap_number"="# of overlap",
  "young_prop" = "Prop. of young people",
  "mean_age"="Mean age"
)

###### Table C.9: Registration of young people at postcode sector level scaled by young population ######
model1 <- lm_robust(n_young_withmiss_sc ~ Treatment, fixed_effects = Constituency, data = df_1)
model2 <- lm_robust(n_young_withmiss_sc ~ Treatment + ps_size, fixed_effects = Constituency, data = df_1)
model3 <- lm_robust(n_young_withmiss_sc ~ Treatment + ps_size + bame_prop, fixed_effects = Constituency, data = df_1)

texreg(list(model1, model2, model3), include.ci = FALSE, omit.coef = "Constituency", caption =
         "Registration of young people at postcode sector level scaled by young population ",
       custom.model.names = c("Basic model", "Extended model", "Full model"),
       label = "tab:trialone",
       digits = 4,       custom.coef.map = covariate_names,
       custom.gof.rows = list("Constituency FE" = c("Yes", "Yes", "Yes"),
                              "PS size" = c("No", "Yes", "Yes"),
                              "Covariates" = c("No", "No", "Yes")))

###### Table C.10: Registration of all people at postcode sector level (Outcome variable: number of registrations scaled by all population) ###### 
model_all1 <- lm_robust(n_withmiss_sc ~ Treatment, fixed_effects = Constituency, data = df_1)
model_all2 <- lm_robust(n_withmiss_sc ~ Treatment + ps_size, fixed_effects = Constituency, data = df_1)
model_all3 <- lm_robust(n_withmiss_sc ~ Treatment + ps_size + bame_prop, fixed_effects = Constituency, data = df_1)

texreg(list(model_all1, model_all2, model_all3), include.ci = FALSE, omit.coef = "Constituency", caption =
         "Registration of all people at postcode sector level (Outcome variable: number of registrations scaled by all population)",
       custom.model.names = c("Basic model", "Extended model", "Full model"),
       label = "trialone_all_population",
       digits = 4,       custom.coef.map = covariate_names,
       custom.gof.rows = list("Constituency FE" = c("Yes", "Yes", "Yes"),
                              "PS size" = c("No", "Yes", "Yes"),
                              "Covariates" = c("No", "No", "Yes")))

###### Table C.11: Sectors with no young registrations (Outcome variable: variable indicating missing sectors) ######
model19 <- lm_robust(missing_young ~ Treatment, fixed_effects = Constituency, data = df_1)
model20 <- lm_robust(missing_young ~ Treatment + ps_size, fixed_effects = Constituency, data = df_1)
model21 <- lm_robust(missing_young ~ Treatment + ps_size + population_all + bame_prop, fixed_effects = Constituency, data = df_1)

texreg(list(model19, model20, model21), include.ci = FALSE, omit.coef = "Constituency", caption =
         "Sectors with no young registrations (Outcome variable: variable indicating missing sectors)",
       custom.model.names = c("Basic model", "Extended model", "Full model"),
       label = "taba2",
       digits = 4,       custom.coef.map = covariate_names,
       custom.gof.rows = list("Constituency FE" = c("Yes", "Yes", "Yes"),
                              "PS size" = c("No", "Yes", "Yes"),
                              "Covariates" = c("No", "No", "Yes")))

####### Table C.12: Controlling for the number of constituencies a postcode sector matches to #######
model_rc_ov <- lm_robust(n_young_withmiss_sc ~ Treatment+overlap_number, fixed_effects = Constituency, data = df_1)
model_rc_ov2 <- lm_robust(n_young_withmiss_sc ~ Treatment + ps_size+overlap_number, fixed_effects = Constituency, data = df_1)
model_rc_ov3 <- lm_robust(n_young_withmiss_sc ~ Treatment + ps_size + bame_prop+overlap_number, fixed_effects = Constituency, data = df_1)

texreg(list(model_rc_ov, model_rc_ov2, model_rc_ov3), include.ci = FALSE, omit.coef = "Constituency", caption =
         "Controlling for the number of constituencies a postcode sector matches to",
       custom.model.names = c("Basic model", "Extended model", "Full model"),
       label = "tab:overlap",
       digits = 4,       custom.coef.map = covariate_names,
       custom.gof.rows = list("Constituency FE" = c("Yes", "Yes", "Yes"),
                              "PS size" = c("No", "Yes", "Yes"),
                              "Covariates" = c("No", "No", "Yes")))

####### Table C.13: The Effect of Complete vs. Partial Match between Postcode Sector and Assigned Constituency (Outcome variable: number of registrations scaled by young population #######
model_rc1 <- lm_robust(n_young_withmiss_sc ~ Treatment + ps_size + bame_prop, fixed_effects = Constituency, data = df_1,
                       subset = overlap_number==1)
model_rc2 <- lm_robust(n_young_withmiss_sc ~ Treatment + ps_size + bame_prop, fixed_effects = Constituency, data = df_1,
                       subset = overlap_number==1 | overlap_number==2)
model_rc3 <- lm_robust(n_young_withmiss_sc ~ Treatment + ps_size + bame_prop, fixed_effects = Constituency, data = df_1,
                       subset = overlap_number==1 | overlap_number==2 | overlap_number==3)
model_rc4 <- lm_robust(n_young_withmiss_sc ~ Treatment + ps_size + bame_prop, fixed_effects = Constituency, data = df_1,
                       subset = overlap_number==1 | overlap_number==2 | overlap_number==3 | overlap_number==4)
model_rc5 <- lm_robust(n_young_withmiss_sc ~ Treatment + ps_size + bame_prop, fixed_effects = Constituency, data = df_1,
                       subset = overlap_number==1 | overlap_number==2 | overlap_number==3 | overlap_number==4 | overlap_number==5)
# Create a list of models
model_list <- list(model_rc1, model_rc2, model_rc3, model_rc4, model_rc5)
# Create custom model names
custom_model_names <- c("Overlap<2", "Overlap<3", "Overlap<4", "Overlap<5", "Overlap<6")

texreg::texreg(model_list,include.ci = FALSE,omit.coef = "Constituency",
  caption = "The Effect of Complete vs. Partial Match between Postcode Sector and Assigned Constituency",
  custom.model.names = custom_model_names,label = "overlap2",digits = 4)

###### Table C.14: Registration of young people at postcode sector level (Outcome variable:absolute number of registrations) ######
model_young1 <- lm_robust(n_young_withmiss ~ Treatment, fixed_effects = Constituency, data = df_1)
model_young2 <- lm_robust(n_young_withmiss ~ Treatment + ps_size, fixed_effects = Constituency, data = df_1)
model_young3 <- lm_robust(n_young_withmiss ~ Treatment + ps_size + bame_prop + young_prop, fixed_effects = Constituency, data = df_1)

texreg(list(model_young1, model_young2, model_young3), include.ci = FALSE, omit.coef = "Constituency", caption =
         "Registration of young people at postcode sector level (Excluding missing sectors)",
       custom.model.names = c("Basic model", "Extended model", "Full model"),
       label = "trialone_full",
       digits = 4,       custom.coef.map = covariate_names,
       custom.gof.rows = list("Constituency FE" = c("Yes", "Yes", "Yes"),
                              "PS size" = c("No", "Yes", "Yes"),
                              "Covariates" = c("No", "No", "Yes")))

###### Table C.15: Heterogeneous effects: Registration of young people at postcode sector level scaled by young population ######
modelhte3 <- lm_robust(n_young_withmiss_sc ~ Treatment*mean_age + ps_size + bame_prop, fixed_effects = Constituency, data = df_1)

texreg(list(modelhte3), include.ci = FALSE, omit.coef = "Constituency", caption =
         "Heterogeneous effects: Registration of young people at postcode sector level scaled by young population ",
       custom.model.names = c("Preregistered HTE"),
       label = "tab:trialone_hte",
       digits = 4,
       custom.gof.rows = list("Constituency FE" = c("Yes"),
                              "PS size" = c("Yes"),
                              "Covariates" = c("Yes")))

##### Figure C.2: The effect of complete vs. partial match between postcode sector and its assigned constituency #####
coef_df <- bind_rows(
  tidy(model_rc1, conf.int = TRUE) %>% mutate(model = "Complete match"),
  tidy(model_rc2, conf.int = TRUE) %>% mutate(model = "Matches to less than 3"),
  tidy(model_rc3, conf.int = TRUE) %>% mutate(model = "Matches to less than 4"),
  tidy(model_rc4, conf.int = TRUE) %>% mutate(model = "Matches to less than 5"),
  tidy(model_rc5, conf.int = TRUE) %>% mutate(model = "Matches to less than 6"))

coef_df <- coef_df %>%
  filter(term == "Treatment")

# Modify levels of model variable to change the order
coef_df$model <- factor(coef_df$model, levels = c("Matches to less than 6",
                                                  "Matches to less than 5",
                                                  "Matches to less than 4",
                                                  "Matches to less than 3",
                                                  "Complete match"))

ggplot(coef_df, aes(x = model, y = estimate, color = model)) +
  geom_point(position = position_dodge(width = 0.5), size = 3) +
  geom_errorbar(aes(ymin = conf.low, ymax = conf.high),
                position = position_dodge(width = 0.5),
                width = 0.2) +
  geom_hline(yintercept = 0, color = "grey") +
  xlab("") +
  ylab("Coefficient") +
  ggtitle("Experiment 1") +            # Main title "Experiment 1"
  labs(subtitle = "The Effect of Complete vs. Partial Match between Postcode Sector and Assigned Constituency") +  # Subtitle
  theme_bw() +
  coord_flip() +
  guides(color = FALSE) +
  scale_color_manual(values = wes_palette('Darjeeling1')) +
  ylim(-0.09, 0.09)

####### Study 2 ####### 

covariate_names <- list(
  "(Intercept)" = "(Intercept)",
  "Treatment" = "Treatment",
  "Pre"="Pre-treatment registration",
  "mean_age" = "Mean age",
  "ps_size"="Sector size",
  "population_all" = "All population",
  "population_young" = "Young population",
  "bame_prop.y"="Prop. of BAME",
  "young_prop"="Prop. of young people"
)

###### Table D.16: Registration of postal voters at the postcode sector level scaled by population ######
model1_exp2<-lm_robust(n_nomiss_sc~Treatment+Pre+blocks, data=df_2)
model2_exp2<-lm_robust(n_nomiss_sc~Treatment+Pre+ps_size, fixed_effects = blocks, data=df_2)
model3_exp2<-lm_robust(n_nomiss_sc~Treatment+Pre+mean_age+ ps_size + bame_prop.y, fixed_effects = blocks,data=df_2)

texreg(list(model1_exp2, model2_exp2, model3_exp2), include.ci = FALSE,
       custom.model.names = c("Basic model", "Extended model", "Full model"),
       caption="Registration of postal voters at the postcode sector level scaled by population",
       label = "tab:trialtwo",
       digits = 4,       custom.coef.map = covariate_names,
       custom.gof.rows = list("Constituency FE" = c("Yes", "Yes", "Yes"),
                              "Pre-treatment N" = c("Yes", "Yes", "Yes"),
                              "PS size" = c("No", "Yes", "Yes"),
                              "Covariates" = c("No", "No", "Yes")))

###### Table D.17: Sectors with no postal registrations (Outcome variable: variable indicating missing sectors) ######
model4_exp2<-lm_robust(missing~Treatment, fixed_effects = blocks, data=df_2)
model5_exp2<-lm_robust(missing~Treatment+Pre, fixed_effects = blocks, data=df_2)
model6_exp2<-lm_robust(missing~Treatment+Pre+mean_age+ ps_size + bame_prop.y, fixed_effects = blocks,data=df_2)

texreg(list(model4_exp2, model5_exp2, model6_exp2), include.ci = FALSE,
       custom.model.names = c("Basic model", "Extended model", "Full model"),
       caption="Sectors with no postal registrations",
       label = "taba22",
       digits = 4,       custom.coef.map = covariate_names,
       custom.gof.rows = list("Constituency FE" = c("Yes", "Yes", "Yes"),
                              "PS size" = c("No", "Yes", "Yes"),
                              "Covariates" = c("No", "No", "Yes")))
###### Table D.18: Registration of postal voters at the postcode sector level ######
model1_exp2_raw<-lm_robust(registration~Treatment+Pre, fixed_effects = blocks, data=df_2)
model2_exp2_raw<-lm_robust(registration~Treatment+Pre+ps_size, fixed_effects = blocks, data=df_2)
model3_exp2_raw<-lm_robust(registration~Treatment+Pre+mean_age+ ps_size + bame_prop.y + young_prop, fixed_effects = blocks,data=df_2)

texreg(list(model1_exp2_raw, model2_exp2_raw, model3_exp2_raw), include.ci = FALSE,
       custom.model.names = c("Basic model", "Extended model", "Full model"),
       caption="Registration of postal voters at the postcode sector level",
       label = "tab:trialtwo_raw",
       digits = 4,       custom.coef.map = covariate_names,
       custom.gof.rows = list("Regional and Size FE" = c("Yes", "Yes", "Yes"),
                              "PS size" = c("No", "Yes", "Yes"),
                              "Covariates" = c("No", "No", "Yes")))

###### Table D.19: Heterogeneous Effects: Registration of postal voters at the postcode sector level scaled by population ######
model4_exp2<-lm_robust(n_nomiss_sc~Treatment*mean_age+Pre+ ps_size + bame_prop.y, fixed_effects = blocks,data=df_2)
model5_exp2<-lm_robust(n_nomiss_sc~Treatment*bame_prop.y+Pre+ ps_size + mean_age, fixed_effects = blocks,data=df_2)

texreg(list(model4_exp2, model5_exp2), include.ci = FALSE,
       custom.model.names = c("Pre-registered HTE 1", "Pre-registered HTE 2"),
       caption="Registration of postal voters at the postcode sector level scaled by population",
       label = "tab:trialtwo",       digits = 4,
       custom.gof.rows = list("Constituency FE" = c("Yes", "Yes"),
                              "Pre-treatment N" = c("Yes", "Yes"),
                              "PS size" = c("Yes", "Yes"),
                              "Covariates" = c("Yes", "Yes")))

####### Study 3 ####### 
covariate_names <- list(
  "(Intercept)" = "(Intercept)",
  "Treatment" = "Treatment",
  "sample"="Sample",
  "genderMale" = "Gender - Male",
  "genderOther/Unknown" = "Gender - Unknown/Other"
)

###### Table E.20: Individual-level voter registration ######
model_trial3_1<-lm_robust(registration~Treatment, fixed_effects =factor(county), clusters= clusters,data=df_3) 
model_trial3_2_1<-lm_robust(registration~Treatment+sample, fixed_effects =factor(county),clusters= clusters,data=df_3)
model_trial3_2<-lm_robust(registration~Treatment+sample+gender, fixed_effects =factor(county),clusters= clusters,data=df_3)

texreg(list(model_trial3_1, model_trial3_2_1, model_trial3_2), include.ci = FALSE,
       caption = "Individual-level voter registration",
       stars = c(0.001, 0.01,0.05),digits=4,
       custom.model.names = c("Basic model", "Extended model", "Full model"),
       label = "tab:trialthree",
       custom.coef.map = covariate_names,
       custom.gof.rows = list("County FE" = c("Yes", "Yes", "Yes"),
                              "Sample" = c("No", "Yes", "Yes"),
                              "Covariates" = c("No", "No", "Yes")))

###### Table E.21: Individual-level voter registration - callback option ######
model_trial3_5<-lm_robust(registration~FB_callback, fixed_effects =factor(county), clusters= clusters,data=df_3)
model_trial3_6_1<-lm_robust(registration~FB_callback+sample, fixed_effects =factor(county),clusters= clusters,data=df_3)
model_trial3_6<-lm_robust(registration~FB_callback+sample+gender, fixed_effects =factor(county),clusters= clusters,data=df_3)

texreg(list(model_trial3_5, model_trial3_6_1, model_trial3_6), include.ci = FALSE,
       caption = "Individual-level voter registration - callback option",
       stars = c(0.001, 0.01,0.05),digits=4,
       custom.model.names = c("Basic model", "Extended model", "Full model"),
       label = "tab:trialthree_callback",
       custom.gof.rows = list("County FE" = c("Yes", "Yes", "Yes"),
                              "Sample" = c("No", "Yes", "Yes"),
                              "Covariates" = c("No", "No", "Yes")))

###### Table E.22: Individual-level turnout ######
model_trial3_1_turnout<-lm_robust(turnout~Treatment+sample, fixed_effects =factor(county), clusters= clusters,data=df_3) 
model_trial3_2_1_turnout<-lm_robust(turnout~Treatment+sample, fixed_effects =factor(county),clusters= clusters,data=df_3)
model_trial3_2_turnout<-lm_robust(turnout~Treatment+sample+gender, fixed_effects =factor(county),clusters= clusters,data=df_3)

texreg(list(model_trial3_1_turnout, model_trial3_2_1_turnout, model_trial3_2_turnout), include.ci = FALSE,
       caption = "Individual-level turnout",
       stars = c(0.001, 0.01,0.05),digits=4,
       custom.model.names = c("Basic model", "Extended model", "Full model"),
       label = "tab:trialthree_turnout",
       custom.coef.map = covariate_names,
       custom.gof.rows = list("County FE" = c("Yes", "Yes", "Yes"),
                              "Sample" = c("No", "Yes", "Yes"),
                              "Covariates" = c("No", "No", "Yes")))

